Aron is demonstrating using pandas to read data and plot using matplotlib.


In [12]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [5]:
# change path as needed.
PATH = 'all_immigrant_probs.csv'
data = pd.read_csv(PATH, header=None)

In [6]:
data.head()


Out[6]:
0 1 2 3 4 5 6 7 8 9 ... 947 948 949 950 951 952 953 954 955 956
0 -195.0 0.080685 0.083078 0.105677 NaN 0.029739 0.084379 0.011826 0.017002 0.029653 ... 0.129844 0.019499 0.111091 0.072375 0.119258 0.317903 0.066854 0.118523 NaN 0.009324
1 -185.0 0.024921 0.024944 0.075406 NaN 0.037173 0.046541 0.022118 NaN 0.023581 ... 0.125411 0.106249 0.091641 0.076613 0.109993 0.109533 0.075414 0.079401 NaN 0.023035
2 -175.0 0.048208 0.092144 0.042366 0.043871 0.028122 0.111163 0.026134 0.022043 0.125895 ... 0.131912 NaN 0.032582 0.131267 0.083344 0.206429 0.106499 0.124147 NaN 0.019503
3 -165.0 0.062095 0.058378 0.026325 0.023334 0.029739 0.067194 0.036022 0.027748 0.035788 ... 0.117357 NaN 0.131863 0.164130 0.123599 0.068147 0.076054 0.119952 NaN 0.026576
4 -155.0 0.095869 NaN 0.050343 0.025357 0.032289 0.101354 0.037561 NaN 0.025916 ... 0.116511 0.070994 0.069701 NaN 0.117703 0.033291 0.084819 0.147340 NaN 0.028604

5 rows × 957 columns


In [29]:
# plot first two users:
data.iloc[:,[0,1,2]].plot(x=0)


Out[29]:
<matplotlib.axes._subplots.AxesSubplot at 0x115671828>

In [13]:
# plot all users. 
data.plot(x=0, legend=False)
plt.xlabel('time from date of immigration')
plt.ylabel('depression probability')


Out[13]:
<matplotlib.text.Text at 0x1131d2128>

In [30]:
# plot first 50:
data.iloc[:,range(50)].plot(x=0, legend=False)
plt.xlabel('time from date of immigration')
plt.ylabel('depression probability')


Out[30]:
<matplotlib.text.Text at 0x1160d75f8>

In [38]:
# plot overall mean.
data.iloc[:,range(1,len(data))].mean(axis=0).plot()


Out[38]:
<matplotlib.axes._subplots.AxesSubplot at 0x1162a0780>